August 12th, 2015
library(pitchRx) # returns a list of related tables (see diagram below) dat <- scrape(start = "2008-01-01", end = Sys.Date())
db <- dplyr::src_sqlite("pitchRx.sqlite3", create = TRUE)
pitchRx::scrape(start = "2008-01-01", end = Sys.Date(), connect = db$con)
update_db(db$con)
Player/date info recorded on the at-bat level.
library(dplyr)
atbats <- tbl(db, 'atbat') %>%
filter(pitcher_name == 'Yu Darvish', batter_name == 'Albert Pujols',
date == '2013_04_24')
tbl(db, 'pitch') %>%
inner_join(atbats, by = c('num', 'gameday_link')) %>%
collect() %>% pitchRx::animateFX()
# condition on umpire decisions
pitches <- tbl(db, "pitch") %>%
filter(des %in% c("Called Strike", "Ball")) %>%
mutate(strike = as.numeric(des == "Called Strike"))
# goal is to compare 2008 to 2014
atbats <- tbl(db, "atbat") %>%
mutate(year = substr(date, 5L, -4L)) %>%
filter(year %in% c("2008", "2014"))
dat <- left_join(pitches, atbats)
library(mgcv)
# 48 (2 x 2 x 12) surfaces!
m <- bam(strike ~ interaction(stand, year, count) +
s(px, pz, by = interaction(stand, year, count)),
data = dat, family = binomial(link = 'logit'))
strikeFX(dat, model = m, density1 = list(year = "2008"),
density2 = list(year = "2014"),
layer = facet_grid(count ~ stand))
but called strikes up-and-in or up-and-away are much less likely nowadays.
Special thanks to: * Brian Mills for comments/discussions on pitchRx and GAMs. * Mike Lopez for the invitation